In [1]:
%run "/code/source/notebooks/notebook_settings.py"
import logging
import numpy as np
import helpsk as hlp
from helpsk.utility import read_pickle, Timer
from helpsk.sklearn_eval import MLExperimentResults

import source.config.config as config
from source.service.model_registry import ModelRegistry

logging.info("Running experiment notebook for last run.")
2023-11-10 07:25:59 - INFO     | Running experiment notebook for last run.

Get Latest Experiment Run from MLFlow¶

In [2]:
registry = ModelRegistry(tracking_uri=config.experiment_server_url())
experiment = registry.get_experiment_by_name(exp_name=config.experiment_name())
logging.info(f"Experiment id: {experiment.last_run.exp_id}")
logging.info(f"Experiment name: {experiment.last_run.exp_name}")
logging.info(f"Run id: {experiment.last_run.run_id}")
logging.info(f"Metric(s): {experiment.last_run.metrics}")
2023-11-10 07:25:59 - INFO     | Experiment id: 1
2023-11-10 07:25:59 - INFO     | Experiment name: credit
2023-11-10 07:25:59 - INFO     | Run id: 505363f85fc741ada20bfb109bdee2d7
2023-11-10 07:25:59 - INFO     | Metric(s): {'roc_auc': 0.762641790401041}

Last Run vs Production¶

What is the metric/performance from the model associated with the last run?

In [3]:
logging.info(f"last run metrics: {experiment.last_run.metrics}")
2023-11-10 07:25:59 - INFO     | last run metrics: {'roc_auc': 0.762641790401041}

What is the metric/performance of the model in production?

In [4]:
production_run = registry.get_production_run(model_name=config.model_name())
logging.info(f"production run metrics: {production_run.metrics}")
2023-11-10 07:25:59 - INFO     | production run metrics: {'roc_auc': 0.7700760706900486}

Last Run¶

In [5]:
# underlying mlflow object
experiment.last_run.mlflow_entity
Out[5]:
<Run: data=<RunData: metrics={'roc_auc': 0.762641790401041}, params={'model__criterion': 'entropy',
 'model__max_depth': '70',
 'model__max_features': '0.1142268477118407',
 'model__max_samples': '0.5483119512487002',
 'model__min_samples_leaf': '8',
 'model__min_samples_split': '12',
 'model__n_estimators': '553',
 'prep__non_numeric__encoder__transformer': "OneHotEncoder(handle_unknown='ignore')",
 'prep__numeric__imputer__transformer': "SimpleImputer(strategy='median')",
 'prep__numeric__pca__transformer': "PCA(n_components='mle')",
 'prep__numeric__scaler__transformer': 'None'}, tags={'mlflow.log-model.history': '[{"run_id": "505363f85fc741ada20bfb109bdee2d7", '
                             '"artifact_path": "model", "utc_time_created": '
                             '"2023-11-10 07:25:55.221442", "flavors": '
                             '{"python_function": {"model_path": "model.pkl", '
                             '"predict_fn": "predict", "loader_module": '
                             '"mlflow.sklearn", "python_version": "3.11.6", '
                             '"env": {"conda": "conda.yaml", "virtualenv": '
                             '"python_env.yaml"}}, "sklearn": '
                             '{"pickled_model": "model.pkl", '
                             '"sklearn_version": "1.3.2", '
                             '"serialization_format": "cloudpickle", "code": '
                             'null}}, "model_uuid": '
                             '"2821e66a22c54ba8b9630eef51fe20c6", '
                             '"mlflow_version": "2.8.0", "model_size_bytes": '
                             '2584736}]',
 'mlflow.note.content': '2023_11_10_07_25_30',
 'mlflow.runName': '2023_11_10_07_25_30',
 'mlflow.source.git.commit': 'e4a4afc62a952e1fc94d01102f96b08cd2840540',
 'mlflow.source.name': 'source/entrypoints/cli.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'root',
 'type': 'BayesSearchCV'}>, info=<RunInfo: artifact_uri='/code/mlflow-artifact-root/1/505363f85fc741ada20bfb109bdee2d7/artifacts', end_time=1699601156815, experiment_id='1', lifecycle_stage='active', run_id='505363f85fc741ada20bfb109bdee2d7', run_name='2023_11_10_07_25_30', run_uuid='505363f85fc741ada20bfb109bdee2d7', start_time=1699601130089, status='FINISHED', user_id='root'>, inputs=<RunInputs: dataset_inputs=[]>>

Load Training & Test Data Info¶

In [6]:
with Timer("Loading training/test datasets"):
    X_train = experiment.last_run.download_artifact(artifact_name='x_train.pkl', read_from=read_pickle)
    X_test = experiment.last_run.download_artifact(artifact_name='x_test.pkl', read_from=read_pickle)
    y_train = experiment.last_run.download_artifact(artifact_name='y_train.pkl', read_from=read_pickle)
    y_test = experiment.last_run.download_artifact(artifact_name='y_test.pkl', read_from=read_pickle)
Timer Started: Loading training/test datasets
Timer Finished (0.01 seconds)
In [7]:
logging.info(f"training X shape: {X_train.shape}")
logging.info(f"training y length: {len(y_train)}")

logging.info(f"test X shape: {X_test.shape}")
logging.info(f"test y length: {len(y_test)}")
2023-11-10 07:25:59 - INFO     | training X shape: (800, 20)
2023-11-10 07:25:59 - INFO     | training y length: 800
2023-11-10 07:25:59 - INFO     | test X shape: (200, 20)
2023-11-10 07:25:59 - INFO     | test y length: 200
In [8]:
np.unique(y_train, return_counts=True)
Out[8]:
(array([0, 1]), array([559, 241]))
In [9]:
train_y_proportion = np.unique(y_train, return_counts=True)[1] \
    / np.sum(np.unique(y_train, return_counts=True)[1])
logging.info(f"balance of y in training: {train_y_proportion}")
2023-11-10 07:25:59 - INFO     | balance of y in training: [0.69875 0.30125]
In [10]:
test_y_proportion = np.unique(y_test, return_counts=True)[1] \
    / np.sum(np.unique(y_test, return_counts=True)[1])
logging.info(f"balance of y in test: {test_y_proportion}")
2023-11-10 07:25:59 - INFO     | balance of y in test: [0.705 0.295]

Cross Validation Results¶

Best Scores/Params¶

In [11]:
results = experiment.last_run.download_artifact(
    artifact_name='experiment.yaml',
    read_from=MLExperimentResults.from_yaml_file
)
logging.info(f"Best Score: {results.best_score}")
logging.info(f"Best Params: {results.best_params}")
2023-11-10 07:25:59 - INFO     | Best Score: 0.762641790401041
2023-11-10 07:25:59 - INFO     | Best Params: {'model': 'RandomForestClassifier()', 'max_features': 0.1142268477118407, 'max_depth': 70, 'n_estimators': 553, 'min_samples_split': 12, 'min_samples_leaf': 8, 'max_samples': 0.5483119512487002, 'criterion': 'entropy', 'imputer': "SimpleImputer(strategy='median')", 'scaler': 'None', 'pca': "PCA('mle')", 'encoder': 'OneHotEncoder()'}
In [12]:
# Best model from each model-type.
df = results.to_formatted_dataframe(return_style=False, include_rank=True)
df["model_rank"] = df.groupby("model")["roc_auc Mean"].rank(method="first", ascending=False)
df.query('model_rank == 1')
Out[12]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI model C max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion learning_rate min_child_weight subsample colsample_bytree colsample_bylevel reg_alpha reg_lambda num_leaves imputer scaler pca encoder model_rank
11 1 0.76 0.71 0.81 RandomForestClassifier() NaN 0.11 70.00 553.00 12.00 8.00 0.55 entropy NaN NaN NaN NaN NaN NaN NaN NaN SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder() 1.00
19 2 0.76 0.72 0.80 XGBClassifier() NaN NaN 1.00 896.00 NaN NaN NaN NaN 0.03 8.00 0.80 0.91 0.83 0.00 1.41 NaN SimpleImputer(strategy='median') None None OneHotEncoder() 1.00
0 3 0.76 0.71 0.81 LogisticRegression() NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN SimpleImputer() StandardScaler() None OneHotEncoder() 1.00
24 4 0.76 0.73 0.79 LGBMClassifier() NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.51 0.68 NaN 5.68 42.57 50.00 SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder() 1.00
9 6 0.75 0.70 0.81 ExtraTreesClassifier() NaN 0.03 84.00 1088.00 24.00 36.00 0.98 gini NaN NaN NaN NaN NaN NaN NaN NaN SimpleImputer() None None OneHotEncoder() 1.00
In [13]:
results.to_formatted_dataframe(return_style=True,
                               include_rank=True,
                               num_rows=500)
Out[13]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI model C max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion learning_rate min_child_weight subsample colsample_bytree colsample_bylevel reg_alpha reg_lambda num_leaves imputer scaler pca encoder
1 0.763 0.711 0.814 RandomForestClassifier() <NA> 0.114 70.000 553.000 12.000 8.000 0.548 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder()
2 0.760 0.719 0.801 XGBClassifier() <NA> <NA> 1.000 896.000 <NA> <NA> <NA> <NA> 0.029 8.000 0.799 0.906 0.825 0.003 1.411 <NA> SimpleImputer(strategy='median') None None OneHotEncoder()
3 0.759 0.713 0.805 LogisticRegression() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() StandardScaler() None OneHotEncoder()
4 0.757 0.726 0.788 LGBMClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 0.511 0.683 <NA> 5.684 42.574 50.000 SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder()
5 0.756 0.726 0.787 RandomForestClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
6 0.753 0.696 0.811 ExtraTreesClassifier() <NA> 0.030 84.000 1,088.000 24.000 36.000 0.981 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
7 0.752 0.684 0.819 LogisticRegression() 0.001 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') MinMaxScaler() None OneHotEncoder()
8 0.750 0.711 0.789 LGBMClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 0.797 0.700 <NA> 6.654 9.475 381.000 SimpleImputer(strategy='median') None None CustomOrdinalEncoder()
9 0.744 0.708 0.780 RandomForestClassifier() <NA> 0.681 38.000 1,461.000 23.000 10.000 0.553 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None CustomOrdinalEncoder()
10 0.743 0.690 0.795 ExtraTreesClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
11 0.739 0.670 0.807 LogisticRegression() 23.327 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') StandardScaler() None OneHotEncoder()
12 0.738 0.705 0.772 LGBMClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
13 0.737 0.696 0.779 RandomForestClassifier() <NA> 0.710 15.000 1,493.000 33.000 27.000 0.914 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') None PCA('mle') OneHotEncoder()
14 0.731 0.712 0.750 LGBMClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 0.598 0.661 <NA> 12.533 35.084 348.000 SimpleImputer(strategy='most_frequent') None None CustomOrdinalEncoder()
15 0.729 0.664 0.794 LGBMClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 0.845 0.453 <NA> 16.166 40.978 351.000 SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder()
16 0.727 0.670 0.785 XGBClassifier() <NA> <NA> 5.000 1,218.000 <NA> <NA> <NA> <NA> 0.115 2.000 0.545 0.648 0.852 0.123 1.165 <NA> SimpleImputer(strategy='median') None PCA('mle') CustomOrdinalEncoder()
17 0.726 0.689 0.762 LogisticRegression() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') StandardScaler() None CustomOrdinalEncoder()
18 0.725 0.699 0.752 RandomForestClassifier() <NA> 0.740 14.000 1,645.000 5.000 43.000 0.741 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') None PCA('mle') CustomOrdinalEncoder()
19 0.723 0.694 0.753 XGBClassifier() <NA> <NA> 15.000 1,159.000 <NA> <NA> <NA> <NA> 0.032 29.000 0.834 0.520 0.503 0.003 1.839 <NA> SimpleImputer(strategy='median') None PCA('mle') CustomOrdinalEncoder()
20 0.723 0.666 0.779 ExtraTreesClassifier() <NA> 0.857 30.000 879.000 17.000 28.000 0.563 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None CustomOrdinalEncoder()
21 0.722 0.655 0.790 ExtraTreesClassifier() <NA> 0.672 81.000 1,136.000 34.000 34.000 0.971 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None PCA('mle') CustomOrdinalEncoder()
22 0.722 0.684 0.760 LogisticRegression() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') StandardScaler() PCA('mle') CustomOrdinalEncoder()
23 0.722 0.658 0.786 ExtraTreesClassifier() <NA> 0.781 50.000 590.000 35.000 47.000 0.846 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder()
24 0.720 0.706 0.733 XGBClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
25 0.714 0.666 0.763 XGBClassifier() <NA> <NA> 3.000 682.000 <NA> <NA> <NA> <NA> 0.152 2.000 0.698 0.940 0.817 0.009 2.086 <NA> SimpleImputer() None PCA('mle') CustomOrdinalEncoder()
In [14]:
results.to_formatted_dataframe(query='model == "RandomForestClassifier()"', include_rank=True)
Out[14]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion imputer pca encoder
1 0.763 0.711 0.814 0.114 70.000 553.000 12.000 8.000 0.548 entropy SimpleImputer(strategy='median') PCA('mle') OneHotEncoder()
2 0.756 0.726 0.787 <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None OneHotEncoder()
3 0.744 0.708 0.780 0.681 38.000 1,461.000 23.000 10.000 0.553 gini SimpleImputer(strategy='median') None CustomOrdinalEncoder()
4 0.737 0.696 0.779 0.710 15.000 1,493.000 33.000 27.000 0.914 gini SimpleImputer(strategy='most_frequent') PCA('mle') OneHotEncoder()
5 0.725 0.699 0.752 0.740 14.000 1,645.000 5.000 43.000 0.741 entropy SimpleImputer(strategy='most_frequent') PCA('mle') CustomOrdinalEncoder()
In [15]:
results.to_formatted_dataframe(query='model == "LogisticRegression()"', include_rank=True)
Out[15]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI C imputer scaler pca encoder
1 0.759 0.713 0.805 <NA> SimpleImputer() StandardScaler() None OneHotEncoder()
2 0.752 0.684 0.819 0.001 SimpleImputer(strategy='median') MinMaxScaler() None OneHotEncoder()
3 0.739 0.670 0.807 23.327 SimpleImputer(strategy='median') StandardScaler() None OneHotEncoder()
4 0.726 0.689 0.762 0.000 SimpleImputer(strategy='median') StandardScaler() None CustomOrdinalEncoder()
5 0.722 0.684 0.760 0.000 SimpleImputer(strategy='median') StandardScaler() PCA('mle') CustomOrdinalEncoder()

BayesSearchCV Performance Over Time¶

In [16]:
results.plot_performance_across_trials(facet_by='model').show()
In [17]:
results.plot_performance_across_trials(query='model == "RandomForestClassifier()"').show()

Variable Performance Over Time¶

In [18]:
results.plot_parameter_values_across_trials(query='model == "RandomForestClassifier()"').show()

Scatter Matrix¶

In [19]:
# results.plot_scatter_matrix(query='model == "RandomForestClassifier()"',
#                             height=1000, width=1000).show()

Variable Performance - Numeric¶

In [20]:
results.plot_performance_numeric_params(query='model == "RandomForestClassifier()"',
                                        height=800)
In [21]:
results.plot_parallel_coordinates(query='model == "RandomForestClassifier()"').show()

Variable Performance - Non-Numeric¶

In [22]:
results.plot_performance_non_numeric_params(query='model == "RandomForestClassifier()"').show()

In [23]:
results.plot_score_vs_parameter(
    query='model == "RandomForestClassifier()"',
    parameter='max_features',
    size='max_depth',
    color='encoder',
)

In [24]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='max_depth'
# )
In [25]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='imputer'
# )

Last Run - Test Set Performance¶

In [26]:
last_model = experiment.last_run.download_artifact(
    artifact_name='model/model.pkl',
    read_from=read_pickle
)
print(type(last_model.model))
<class 'sklearn.pipeline.Pipeline'>
In [27]:
last_model
Out[27]:
SklearnModelWrapper(model=Pipeline(steps=[('prep',
                                           ColumnTransformer(transformers=[('numeric',
                                                                            Pipeline(steps=[('imputer',
                                                                                             TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                                                            ('scaler',
                                                                                             TransformerChooser()),
                                                                                            ('pca',
                                                                                             TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                                            ['duration',
                                                                             'credit_amount',
                                                                             'installment_commitment',
                                                                             'residence_si...
                                                                             'savings_status',
                                                                             'employment',
                                                                             'personal_status',
                                                                             'other_parties',
                                                                             'property_magnitude',
                                                                             'other_payment_plans',
                                                                             'housing',
                                                                             'job',
                                                                             'own_telephone',
                                                                             'foreign_worker'])])),
                                          ('model',
                                           RandomForestClassifier(criterion='entropy',
                                                                  max_depth=70,
                                                                  max_features=0.1142268477118407,
                                                                  max_samples=0.5483119512487002,
                                                                  min_samples_leaf=8,
                                                                  min_samples_split=12,
                                                                  n_estimators=553,
                                                                  random_state=42))]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SklearnModelWrapper(model=Pipeline(steps=[('prep',
                                           ColumnTransformer(transformers=[('numeric',
                                                                            Pipeline(steps=[('imputer',
                                                                                             TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                                                            ('scaler',
                                                                                             TransformerChooser()),
                                                                                            ('pca',
                                                                                             TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                                            ['duration',
                                                                             'credit_amount',
                                                                             'installment_commitment',
                                                                             'residence_si...
                                                                             'savings_status',
                                                                             'employment',
                                                                             'personal_status',
                                                                             'other_parties',
                                                                             'property_magnitude',
                                                                             'other_payment_plans',
                                                                             'housing',
                                                                             'job',
                                                                             'own_telephone',
                                                                             'foreign_worker'])])),
                                          ('model',
                                           RandomForestClassifier(criterion='entropy',
                                                                  max_depth=70,
                                                                  max_features=0.1142268477118407,
                                                                  max_samples=0.5483119512487002,
                                                                  min_samples_leaf=8,
                                                                  min_samples_split=12,
                                                                  n_estimators=553,
                                                                  random_state=42))]))
Pipeline(steps=[('prep',
                 ColumnTransformer(transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                                  ('scaler',
                                                                   TransformerChooser()),
                                                                  ('pca',
                                                                   TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                  ['duration', 'credit_amount',
                                                   'installment_commitment',
                                                   'residence_since', 'age',
                                                   'existing_credi...
                                                   'savings_status',
                                                   'employment',
                                                   'personal_status',
                                                   'other_parties',
                                                   'property_magnitude',
                                                   'other_payment_plans',
                                                   'housing', 'job',
                                                   'own_telephone',
                                                   'foreign_worker'])])),
                ('model',
                 RandomForestClassifier(criterion='entropy', max_depth=70,
                                        max_features=0.1142268477118407,
                                        max_samples=0.5483119512487002,
                                        min_samples_leaf=8,
                                        min_samples_split=12, n_estimators=553,
                                        random_state=42))])
ColumnTransformer(transformers=[('numeric',
                                 Pipeline(steps=[('imputer',
                                                  TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                 ('scaler',
                                                  TransformerChooser()),
                                                 ('pca',
                                                  TransformerChooser(transformer=PCA(n_components='mle')))]),
                                 ['duration', 'credit_amount',
                                  'installment_commitment', 'residence_since',
                                  'age', 'existing_credits',
                                  'num_dependents']),
                                ('non_numeric',
                                 Pipeline(steps=[('encoder',
                                                  TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
                                 ['checking_status', 'credit_history',
                                  'purpose', 'savings_status', 'employment',
                                  'personal_status', 'other_parties',
                                  'property_magnitude', 'other_payment_plans',
                                  'housing', 'job', 'own_telephone',
                                  'foreign_worker'])])
['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer(strategy='median'))
SimpleImputer(strategy='median')
SimpleImputer(strategy='median')
TransformerChooser()
TransformerChooser(transformer=PCA(n_components='mle'))
PCA(n_components='mle')
PCA(n_components='mle')
['checking_status', 'credit_history', 'purpose', 'savings_status', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore'))
OneHotEncoder(handle_unknown='ignore')
OneHotEncoder(handle_unknown='ignore')
RandomForestClassifier(criterion='entropy', max_depth=70,
                       max_features=0.1142268477118407,
                       max_samples=0.5483119512487002, min_samples_leaf=8,
                       min_samples_split=12, n_estimators=553, random_state=42)
In [28]:
test_predictions = last_model.predict(X_test)
test_predictions[0:10]
Out[28]:
array([0.3733136 , 0.4118129 , 0.48404516, 0.35906406, 0.17344127,
       0.33008237, 0.17795812, 0.40549351, 0.21201897, 0.24426042])
In [29]:
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
    actual_values=y_test,
    predicted_scores=test_predictions,
    score_threshold=0.37
)
In [30]:
evaluator.plot_actual_vs_predict_histogram()
In [31]:
evaluator.plot_confusion_matrix()
No description has been provided for this image
In [32]:
evaluator.all_metrics_df(return_style=True,
                         dummy_classifier_strategy=['prior', 'constant'],
                         round_by=3)
Out[32]:
  Score Dummy (prior) Dummy (constant) Explanation
AUC 0.794 0.500 0.500 Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier)
True Positive Rate 0.593 0.000 1.000 59.3% of positive instances were correctly identified.; i.e. 35 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall
True Negative Rate 0.837 1.000 0.000 83.7% of negative instances were correctly identified.; i.e. 118 "Negative Class" labels were correctly identified out of 141 instances
False Positive Rate 0.163 0.000 1.000 16.3% of negative instances were incorrectly identified as positive; i.e. 23 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances
False Negative Rate 0.407 1.000 0.000 40.7% of positive instances were incorrectly identified as negative; i.e. 24 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances
Positive Predictive Value 0.603 0.000 0.295 When the model claims an instance is positive, it is correct 60.3% of the time; i.e. out of the 58 times the model predicted "Positive Class", it was correct 35 times; a.k.a precision
Negative Predictive Value 0.831 0.705 0.000 When the model claims an instance is negative, it is correct 83.1% of the time; i.e. out of the 142 times the model predicted "Negative Class", it was correct 118 times
F1 Score 0.598 0.000 0.456 The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0.
Precision/Recall AUC 0.642 0.295 0.295 Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats.
Accuracy 0.765 0.705 0.295 76.5% of instances were correctly identified
Error Rate 0.235 0.295 0.705 23.5% of instances were incorrectly identified
% Positive 0.295 0.295 0.295 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class"
Total Observations 200 200 200 There are 200 total observations; i.e. sample size
In [33]:
evaluator.plot_roc_auc_curve().show()
In [34]:
evaluator.plot_precision_recall_auc_curve().show()
In [35]:
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
In [36]:
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
In [37]:
evaluator.calculate_lift_gain(return_style=True)
/usr/local/lib/python3.11/site-packages/helpsk/sklearn_eval.py:2480: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.

Out[37]:
  Gain Lift
Percentile    
5 0.17 3.39
10 0.25 2.54
15 0.32 2.15
20 0.42 2.12
25 0.53 2.10
30 0.61 2.03
35 0.68 1.94
40 0.75 1.86
45 0.76 1.69
50 0.80 1.59
55 0.80 1.45
60 0.88 1.47
65 0.88 1.36
70 0.92 1.31
75 0.93 1.24
80 0.97 1.21
85 0.98 1.16
90 1.00 1.11
95 1.00 1.05
100 1.00 1.00

Production Model - Test Set Performance¶

In [38]:
production_model = production_run.download_artifact(
    artifact_name='model/model.pkl',
    read_from=read_pickle
)
print(type(production_model.model))
<class 'sklearn.pipeline.Pipeline'>
In [39]:
production_model
Out[39]:
SklearnModelWrapper(model=Pipeline(steps=[('prep',
                                           ColumnTransformer(transformers=[('numeric',
                                                                            Pipeline(steps=[('imputer',
                                                                                             TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                                                            ('scaler',
                                                                                             TransformerChooser()),
                                                                                            ('pca',
                                                                                             TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                                            ['duration',
                                                                             'credit_amount',
                                                                             'installment_commitment',
                                                                             'residence_si...
                                                                             'personal_status',
                                                                             'other_parties',
                                                                             'property_magnitude',
                                                                             'other_payment_plans',
                                                                             'housing',
                                                                             'job',
                                                                             'own_telephone',
                                                                             'foreign_worker'])])),
                                          ('model',
                                           ExtraTreesClassifier(bootstrap=True,
                                                                criterion='entropy',
                                                                max_depth=99,
                                                                max_features=0.031837350792579364,
                                                                max_samples=0.9248344222191298,
                                                                min_samples_leaf=4,
                                                                min_samples_split=16,
                                                                n_estimators=1235,
                                                                random_state=42))]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SklearnModelWrapper(model=Pipeline(steps=[('prep',
                                           ColumnTransformer(transformers=[('numeric',
                                                                            Pipeline(steps=[('imputer',
                                                                                             TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                                                            ('scaler',
                                                                                             TransformerChooser()),
                                                                                            ('pca',
                                                                                             TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                                            ['duration',
                                                                             'credit_amount',
                                                                             'installment_commitment',
                                                                             'residence_si...
                                                                             'personal_status',
                                                                             'other_parties',
                                                                             'property_magnitude',
                                                                             'other_payment_plans',
                                                                             'housing',
                                                                             'job',
                                                                             'own_telephone',
                                                                             'foreign_worker'])])),
                                          ('model',
                                           ExtraTreesClassifier(bootstrap=True,
                                                                criterion='entropy',
                                                                max_depth=99,
                                                                max_features=0.031837350792579364,
                                                                max_samples=0.9248344222191298,
                                                                min_samples_leaf=4,
                                                                min_samples_split=16,
                                                                n_estimators=1235,
                                                                random_state=42))]))
Pipeline(steps=[('prep',
                 ColumnTransformer(transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                                  ('scaler',
                                                                   TransformerChooser()),
                                                                  ('pca',
                                                                   TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                  ['duration', 'credit_amount',
                                                   'installment_commitment',
                                                   'residence_since', 'age',
                                                   'existing_credi...
                                                   'personal_status',
                                                   'other_parties',
                                                   'property_magnitude',
                                                   'other_payment_plans',
                                                   'housing', 'job',
                                                   'own_telephone',
                                                   'foreign_worker'])])),
                ('model',
                 ExtraTreesClassifier(bootstrap=True, criterion='entropy',
                                      max_depth=99,
                                      max_features=0.031837350792579364,
                                      max_samples=0.9248344222191298,
                                      min_samples_leaf=4, min_samples_split=16,
                                      n_estimators=1235, random_state=42))])
ColumnTransformer(transformers=[('numeric',
                                 Pipeline(steps=[('imputer',
                                                  TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                 ('scaler',
                                                  TransformerChooser()),
                                                 ('pca',
                                                  TransformerChooser(transformer=PCA(n_components='mle')))]),
                                 ['duration', 'credit_amount',
                                  'installment_commitment', 'residence_since',
                                  'age', 'existing_credits',
                                  'num_dependents']),
                                ('non_numeric',
                                 Pipeline(steps=[('encoder',
                                                  TransformerChooser(transformer=CustomOrdinalEncoder()))]),
                                 ['checking_status', 'credit_history',
                                  'purpose', 'savings_status', 'employment',
                                  'personal_status', 'other_parties',
                                  'property_magnitude', 'other_payment_plans',
                                  'housing', 'job', 'own_telephone',
                                  'foreign_worker'])])
['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer(strategy='median'))
SimpleImputer(strategy='median')
SimpleImputer(strategy='median')
TransformerChooser()
TransformerChooser(transformer=PCA(n_components='mle'))
PCA(n_components='mle')
PCA(n_components='mle')
['checking_status', 'credit_history', 'purpose', 'savings_status', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
TransformerChooser(transformer=CustomOrdinalEncoder())
CustomOrdinalEncoder()
CustomOrdinalEncoder()
ExtraTreesClassifier(bootstrap=True, criterion='entropy', max_depth=99,
                     max_features=0.031837350792579364,
                     max_samples=0.9248344222191298, min_samples_leaf=4,
                     min_samples_split=16, n_estimators=1235, random_state=42)
In [40]:
test_predictions = production_model.predict(X_test)
test_predictions[0:10]
Out[40]:
array([0.32794716, 0.32893009, 0.37430263, 0.3052219 , 0.23870015,
       0.31131838, 0.24759746, 0.33864031, 0.23092148, 0.24922654])
In [41]:
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
    actual_values=y_test,
    predicted_scores=test_predictions,
    score_threshold=0.37
)
In [42]:
evaluator.plot_actual_vs_predict_histogram()
In [43]:
evaluator.plot_confusion_matrix()
No description has been provided for this image
In [44]:
evaluator.all_metrics_df(return_style=True,
                         dummy_classifier_strategy=['prior', 'constant'],
                         round_by=3)
Out[44]:
  Score Dummy (prior) Dummy (constant) Explanation
AUC 0.781 0.500 0.500 Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier)
True Positive Rate 0.169 0.000 1.000 16.9% of positive instances were correctly identified.; i.e. 10 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall
True Negative Rate 0.957 1.000 0.000 95.7% of negative instances were correctly identified.; i.e. 135 "Negative Class" labels were correctly identified out of 141 instances
False Positive Rate 0.043 0.000 1.000 4.3% of negative instances were incorrectly identified as positive; i.e. 6 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances
False Negative Rate 0.831 1.000 0.000 83.1% of positive instances were incorrectly identified as negative; i.e. 49 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances
Positive Predictive Value 0.625 0.000 0.295 When the model claims an instance is positive, it is correct 62.5% of the time; i.e. out of the 16 times the model predicted "Positive Class", it was correct 10 times; a.k.a precision
Negative Predictive Value 0.734 0.705 0.000 When the model claims an instance is negative, it is correct 73.4% of the time; i.e. out of the 184 times the model predicted "Negative Class", it was correct 135 times
F1 Score 0.267 0.000 0.456 The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0.
Precision/Recall AUC 0.621 0.295 0.295 Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats.
Accuracy 0.725 0.705 0.295 72.5% of instances were correctly identified
Error Rate 0.275 0.295 0.705 27.5% of instances were incorrectly identified
% Positive 0.295 0.295 0.295 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class"
Total Observations 200 200 200 There are 200 total observations; i.e. sample size
In [45]:
evaluator.plot_roc_auc_curve().show()
In [46]:
evaluator.plot_precision_recall_auc_curve().show()
In [47]:
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
In [48]:
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
In [49]:
evaluator.calculate_lift_gain(return_style=True)
/usr/local/lib/python3.11/site-packages/helpsk/sklearn_eval.py:2480: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.

Out[49]:
  Gain Lift
Percentile    
5 0.17 3.39
10 0.20 2.03
15 0.34 2.26
20 0.44 2.20
25 0.49 1.97
30 0.56 1.86
35 0.63 1.79
40 0.69 1.74
45 0.76 1.69
50 0.80 1.59
55 0.83 1.51
60 0.85 1.41
65 0.86 1.33
70 0.93 1.33
75 0.95 1.27
80 0.97 1.21
85 0.98 1.16
90 1.00 1.11
95 1.00 1.05
100 1.00 1.00